Load in CSV data

shipibo_age_interval <- 2
spanish_age_interval <- 1

naming_data <- read_csv("data/naming_colors_participants.csv") %>%
  left_join(read_csv("data/naming_colors_data.csv"), by = 'subj')

grouping_data <- read_csv("data/grouping_colors_participants.csv") %>%
  left_join(read_csv("data/grouping_colors_data.csv"), by = 'subj')

shipibo_child_data <- read_csv("data/shipibo_children_colors_participants.csv") %>%
  left_join(read_csv("data/shipibo_children_colors_data.csv"), by = 'subj')

spanish_child_data <- read_csv("data/spanish_children_colors_participants.csv") %>%
  left_join(read_csv("data/spanish_children_colors_data.csv"), by = 'subj')

color_chip_data <- read_csv("data/wcs_measures.csv", skip = 1)

shipibo_graph_colors <- c(
  'Ambi' = '#874A8C',
  'Ami' = '#76296E',
  'Barin Poi' = '#6D6212',
  'Bexnan' = '#B6D744',
  'Chexe' = '#81C147',
  'Chimapo' = '#003459',
  'Emo' = '#007177',
  'Jimi' = '#822158',
  'Joshin' = '#BC1E47',
  'Joxo' = '#DFE6F0',
  'Kari' = '#571848',
  'Kasho' = '#F07000',
  'Keskiti' = '#E56F92',
  'Koin' = '#50491D',
  'Kononbi' = '#503B87',
  'Konron' = '#BB8F00',
  'Koro' = '#7B7B7B',
  'Mai' = '#7F5A21',
  'Mandi' = '#005637',
  'Manxan' = '#FEBBA1',
  'Maxe' = '#DC4800',
  'Nai' = '#19A2C2',
  'Oxne' = '#66BCC9',
  'Panshin' = '#EDC800',
  'Pasna' = '#D3C5DF',
  'Paxna' = '#EC99A2',
  'Pei' = '#69C360',
  'Pene' = '#55471E',
  'Poa' = '#7E4E94',
  'Ranchesh' = '#4A2347',
  'Tena' = '#C5D500',
  'Yame' = '#666412',
  'Yankon' = '#00A79E',
  'Wiso' = '#272727',
  'Xena' = '#D4799C',
  'Xexe' = '#9769AE',
  'Xo' = '#3A6E14',
  'Spanish Term' = '#FF6E00'
)

spanish_graph_colors <- c(
  'Amarillo' = '#FFD416',
  'Azul' = '#337DCE',
  'Blanco' = '#F7F7F7',
  'Celeste' = '#74DFF7', 
  'Gris' = '#979997', 
  'Joshin' = '#BC1E47', 
  'Joxo' = '#DFE6F0', 
  'Kari' = '#571848', 
  'Marron' = '#9E5E22', 
  'Morado' = '#B175F9', 
  'Nai' = '#19A2C2', 
  'Naranja' = '#FF6E00',
  'Oshne' = '#66BCC9', 
  'Panshin' = '#EDC800', 
  'Plomo' = '#848484', 
  'Pei' = '#69C360', 
  'Poa' = '#7E4E94', 
  'Rojo' = '#E03D28', 
  'Rosa' = '#FF8C9D', 
  'Verde' = '#61E27B', 
  'Wiso' = '#272727', 
  'Yankon' = '#00A79E'
  )

shipibo_chip_set <- read.csv(text = "shipibo, spanish, munsell_code, chip_id
Joshin, rojo, G3, 245
Pei/Xo, verde, G18, 234
Panshin, amarillo, C9, 297
Huiso, negro, J1/I0, 312
Joxo, blanco, A/B0, 274
Nai, celeste, E29, 1
Ami/Pua, morado, H36, 325
Barin poi, mierda sol, F12, 320")

spanish_chip_set <- read.csv(text = "spanish, munsell_code, chip_id
Blanco, A/B0, 274
Verde, G18, 234
Rojo, G3, 245
Amarillo, C9, 297
Azul, F30, 291
Negro, J1/I0, 312
Naranja, E4, 121
Gris, F0, 46
Morado, H36, 325
Marron, G5, 266
Rosa, F39, 65")

string_spelling_list <- "`Ami` = c('ami'), `Ambi` = c('ambi'), `Barin Poi` = c('barin pui', 'barrin pui', 'barrinpui', 'pui', 'barin poi', 'barrin poi', 'bavrinpui*', 'barri'), `Bexnan` = c('berrnan', 'bexna', 'bexnan'), `Kari` = c('cari', 'carri', 'kari', 'karri'), `Chexe` = c('chese', 'chexe'), `Chimapo` = c('chimapu'), `Emo` = c('emu'), `Jimi` = c('jimi'), `Jisa` = c('jisa'), `Joshin` = c('joshin', 'joxin', 'toshin'), `Joxo` = c('josho', 'joxo'), `Kasho` = c('kashos'), `Keskiti` = c('kex keti'), `Koin` = c('kuin'), `Kononbi` = c('kunumbi'), `Konron` = c('korrum', 'kumrrum', 'kunrrum'), `Koro` = c('coro'), `Mai` = c('mai'), `Mandi` = c('mandi'), `Manxan` = c('manrran', 'manshan', 'manxam', 'manxan', 'maxan', 'maxna'), `Maxe` = c('maxe'), `Nai` = c('nai', 'nia'), `Oxne` = c('oshne'), `Pei` = c('pei'), `Poa` = c('pua'), `Pene` = c('pene'), `Panshin` = c('panshin'), `Pasna` = c('paxsna', 'pasna'), `Paxna` = c('parrna', 'paxna'), `Ranchesh` = c('ranchex'), `Spanish Term` = c('rojo', 'blanco', 'verde', 'amarillo', 'celeste', 'negro', 'morado', 'azul', 'marron', 'bioleta', 'verdesito', 'carne', 'naranjada', 'naranjado', 'amarilla', 'agua', 'agur', 'uva color*', 'violeta', 'pasto payota', 'naranja', 'chocolate', 'rosado', 'rosada', 'narranxa', 'anaranjado', 'coral', 'cerde', 'gris', 'oscuro', 'lila', 'azu', 'color cielo', 'cielo'), `Tena` = c('tena'), `Wiso` = c('wiso'), `Xena` = c('xena'), `Xo` = c('xo'), `Xexe` = c('xexe', 'xexi'), `Yame` = c('rayame', 'yame'), `Yankon` = c('rayanko', 'yankom', 'yankon', 'yankum', 'yankun', 'yankontani', 'yakon', 'yakun', 'yankoncha'), `NA` = c(NA)"

spelling_list <- eval(parse(text = paste0("c(",string_spelling_list,")")))

naming_data %<>%
  mutate(color_cat = ifelse(is.na(color_cat), first_response, color_cat)) %>%
  mutate(color_cat = ifelse(color_cat %in% unlist(spelling_list), color_cat, NA)) %>%
  mutate(color_cat = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(color_cat, x)")))
         )

grouping_data %<>%
  mutate(`nombre del grupo` = ifelse(`nombre del grupo` %in% unlist(spelling_list), 
                                     `nombre del grupo`, NA)) %>%
  mutate(`nombre del grupo` = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(`nombre del grupo`, x)")))
         )

color_chip_data %<>%
  mutate(hex = colorspace::hex(
    colorspace::LAB(color_chip_data$`L*`, color_chip_data$`a*`, 
                    color_chip_data$`b*`, color_chip_data$`#cnum`), fixup = T))

Which terms appear to be basic and commonly used?

naming_data_profusion <- naming_data %>%
  group_by(subj, color_cat) %>%
  summarise(n = n()) %>%
  group_by(color_cat) %>%
  spread(subj, n, fill = 0) %>%
  gather(key = 'subj', value = 'n', -color_cat) %>%
  summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(), 
            `Mean % of Chips in Set Labeled` = 100*mean(n)/165) %>%
  dplyr::rename(`Color Term` = color_cat)

naming_list <- as.character(na.omit(filter(naming_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))

datatable(naming_data_profusion, rownames = FALSE)

In the naming task with 165 color chips, commonly used terms include:

num_groups <- grouping_data %>%
  filter(task == 1) %>%
  group_by(subj) %>%
  summarise(`# of Groups` = n_distinct(`nombre del grupo`)) %>%
  ungroup() %>%
  summarise(`Avg # of Groups` = mean(`# of Groups`), 
            `Min # of Groups` = min(`# of Groups`),
            `Max # of Groups` = max(`# of Groups`))

grouping_data_profusion <- grouping_data %>%
  filter(task == 1) %>%
  group_by(subj, `nombre del grupo`) %>%
  summarise(`cuantas tarjetas` = mean(`cuantas tarjetas`)) %>%
  group_by(`nombre del grupo`) %>%
  spread(subj, `cuantas tarjetas`, fill = 0) %>%
  gather(key = 'subj', value = 'n', -`nombre del grupo`) %>%
  summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(), 
            `Mean % of Chips in Set Labeled` = 100*mean(n)/60) %>%
  dplyr::rename(`Color Term` = `nombre del grupo`)

grouping_list <- as.character(na.omit(filter(grouping_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))

datatable(grouping_data_profusion, rownames = FALSE)

In the grouping task with 60 chips, subjects usually create between 4-7 groups and mostly use terms like:

For each color chip, how many adults label it with the same term?

consensus <- 75

naming_consensus <- naming_data %>%
  select(subj, chip_id, color_cat) %>%
  mutate(set = ifelse((chip_id %% 2) == 0, 'even', 'odd')) %>%
  split(.$set) %>%
  map_df(function(x) {
    x %>%
    group_by(chip_id, color_cat) %>%
    summarise(n = n()) %>%
    group_by(chip_id) %>%
    mutate(perc = 100*n/sum(n)) %>%
    select(-n)
  }) %>%
  arrange(chip_id) %>%
  rename(`Chip ID` = chip_id, `Color Term` = color_cat, `% of Subjects` = perc)

datatable(naming_consensus %>%
            spread(`Color Term`, `% of Subjects`, fill = 0), 
  rownames = FALSE, fillContainer = TRUE)
focal_terms <- pander::p(as.character(
  unique(filter(naming_consensus,`% of Subjects` >= consensus)$`Color Term`)), 
  wrap = '', copula = ', and ')

color_chip_hexes <- color_chip_data[, c('#cnum', 'hex')]


highest_chips <- (naming_consensus %>% group_by(`Color Term`) %>%
  filter(`% of Subjects` >= consensus & `% of Subjects` == max(`% of Subjects`)))$`Chip ID`

agreed_chips <- naming_consensus %>%
  group_by(`Color Term`) %>%
  filter(`% of Subjects` >= consensus) %>%
  arrange(`Color Term`, `Chip ID`) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  dplyr::rename(`Hex Code` = hex) %>%
  mutate(highest_chips = ifelse(`Chip ID` %in% highest_chips, 1, 0))


datatable(agreed_chips, rownames = FALSE,
          options=list(columnDefs = list(list(
            visible=FALSE, targets=c(grep('highest_chips', names(agreed_chips))-1))))) %>%
  formatStyle('highest_chips', target = 'row',
              fontWeight = styleEqual(c(0,1), c('normal','bold'))) %>%
  formatStyle(columns = "Hex Code",
              background = styleEqual(agreed_chips$`Hex Code`, agreed_chips$`Hex Code`))

The only categories with chips that reach a high level of consensus appear to be Yankon, Joshin, Panshin, Joxo, and Wiso

highest_consenus <- naming_consensus %>%
  group_by(`Chip ID`) %>%
  filter(`% of Subjects` == max(`% of Subjects`)) %>%
  left_join(color_chip_data, by = c("Chip ID" = "#cnum")) %>%
  mutate(`Spanish Term` = ifelse(`Color Term` == 'Spanish Term','Spanish','Shipibo'))

consensus_plot <- ggplot(highest_consenus, 
                         aes(x = H, y = factor(V), 
                             colour = `Color Term`, size = `% of Subjects`, 
                             shape = `Spanish Term`)) + 
    geom_point() + 
    scale_size(range = c(0, 2.5)) + 
    scale_shape_manual(name = 'Spanish Term', 
                       values = c('Spanish' = 17, 'Shipibo' = 16)) +
    scale_colour_manual(name = "Color Term",values = shipibo_graph_colors) +
    scale_y_discrete(limits = rev(levels(factor(highest_consenus$V)))) +
    theme_bw()

Original plot displaying 330 chips by their V and H coordinates

Is there a similar amount of consensus on labeling between children and adults (in Shipibo)?

shipibo_1st_response <- shipibo_child_data %>%
  mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
  filter(task == 1) %>%
  mutate(response_1 = ifelse(response_1 %in% unlist(spelling_list), 
                                     response_1, NA)) %>%
  mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list,
                                                      "forcats::fct_collapse(response_1, x)")))
         ) %>%
  mutate( age_ints = round(age/shipibo_age_interval)*shipibo_age_interval) %>%
  select(subj, age, age_ints, prompt, response_1) %>%
  split(.$age_ints) %>%
  map_df(function(x) {
    x %>%
      mutate(response_1 = as.character(response_1)) %>%
      spread(prompt, response_1, fill = 'No Response') %>%
      gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
      group_by(age_ints, prompt, response) %>%
      summarise(n = n()) %>%
      group_by(age_ints, prompt) %>%
      mutate(perc = 100*n/sum(n), n_total = sum(n))
  }) %>% ungroup() %>%
  mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
                                        `1` = c('celeste'),
                                        `234` = c('verde'),
                                        `245` = c('rojo'),
                                        `274` = c('blanco'),
                                        `297` = c('amarillo'),
                                        `312` = c('negro'),
                                        `320` = c('mierda sol'),
                                        `325` = c('morado'))))) %>%
  left_join(color_chip_hexes,
            by = c("prompt" = "#cnum")) %>%
  rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response, 
         `% of Subjects` = perc, `Hex Code` = hex) %>%
  filter(n_total >= 4)


adult_naming <- naming_consensus %>%
  group_by(`Color Term`) %>%
  mutate(Age = 18) %>%
  arrange(`Chip ID`, `Color Term`) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  filter(`Chip ID` %in% shipibo_chip_set$chip_id & !is.na(`Color Term`)) %>%
  dplyr::rename(`Hex Code` = hex)

naming_data_combined <- bind_rows(shipibo_1st_response, adult_naming)


term_prototypes <- naming_consensus %>%
  group_by(`Color Term`) %>%
  dplyr::arrange(`Color Term`, desc(`% of Subjects`)) %>%
  slice(1:3) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  dplyr::rename(`Hex Code` = hex)

shipibo_chip_set_data <- color_chip_data %>% 
  filter(`#cnum` %in% shipibo_chip_set$chip_id) %>%
  select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
  arrange(`#cnum`) %>%
  rename(`Chip ID` = `#cnum`, `Hex Code` = hex)

datatable(shipibo_chip_set_data, rownames = FALSE) %>%
  formatStyle(columns = "Hex Code",
              background = styleEqual(shipibo_chip_set_data$`Hex Code`, shipibo_chip_set_data$`Hex Code`))

Spanish term as a dotted line?

p <- ggplot(filter(naming_data_combined, Age < 18 & `Color Term` != 'Spanish Term'), 
       aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
  facet_wrap(~`Chip ID`) +
  geom_line(size = 1) +
  geom_line(data = filter(naming_data_combined, 
                          Age < 18 & `Color Term` == 'Spanish Term'), 
            linetype = 2, size = 1) + 
  geom_point(data = filter(naming_data_combined, 
                          Age < 18 & `Color Term` == 'Spanish Term'), 
            shape = 17, size = 3) + 
  geom_point( size=3) +
  geom_point(data = filter(naming_data_combined, Age >= 18), size=3) +
  scale_y_continuous(limits = c(-10,110), breaks = seq(0,100, by = 25)) + 
  scale_x_continuous(breaks = c(seq(6,12,2),18), labels = c(seq(6,12,2),'Adult')) +
  scale_colour_manual(name = "Color Term",values = shipibo_graph_colors) +
  theme_bw() + 
  theme(panel.grid = element_blank())

ggplotly(p)
spanish_chip_set_data <- color_chip_data %>% 
  filter(`#cnum` %in% spanish_chip_set$chip_id) %>%
  select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
  arrange(`#cnum`) %>%
  rename(`Chip ID` = `#cnum`, `Hex Code` = hex)

datatable(spanish_chip_set_data, rownames = FALSE, options = list(pageLength = nrow(spanish_chip_set_data))) %>%
  formatStyle(columns = "Hex Code",
              background = styleEqual(spanish_chip_set_data$`Hex Code`, spanish_chip_set_data$`Hex Code`))
spanish_string_spelling_list <- "`Amarillo` = c('amarilla'), `Azul` = c('azul'), `Blanco` = c('blanco'), `Celeste` = c('celeste'), `Gris` = c('gris'), `Joshin` = c('joshin'), `Joxo` = c('joxo'), `Kari` = c('carri'), `Marron` = c('marron'), `Morado` = c('bioleta', 'morado', 'violeta'), `Nai` = c('nai'), `Naranja` = c('naranja', 'naranjada', 'narranxa', 'naranjado', 'narango', 'naranjo'), `Oshne` = c('oxe'), `Panshin` = c('panshin'), `Plomo` = c('plomo'), `Pei` = c('pei'), `Poa` = c('pua'), `Rojo` = c('rojo'), `Rosa` = c('rosada', 'rosa', 'rosado'), `Verde` = c('verde'), `Wiso` = c('wiso'), `Yankon` = c('yankon'), `NA` = c(NA)" 

spanish_spelling_list <- eval(parse(text = paste0("c(",spanish_string_spelling_list,")")))

spanish_1st_response <- spanish_child_data %>%
  mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
  filter(task == 1) %>%
  mutate(response_1 = ifelse(tolower(response_1) %in% unlist(spanish_spelling_list), 
                                     tolower(response_1), NA)) %>%
  mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = spanish_string_spelling_list, "forcats::fct_collapse(response_1, x)")))
         ) %>%
  mutate( age_ints = round(age/spanish_age_interval)*spanish_age_interval) %>%
  select(subj, age, age_ints, prompt, response_1) %>%
  split(.$age_ints) %>%
  map_df(function(x) {
    x %>%
      mutate(response_1 = as.character(response_1)) %>%
      spread(prompt, response_1, fill = 'No Response') %>%
      gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
      group_by(age_ints, prompt, response) %>%
      summarise(n = n()) %>%
      group_by(age_ints, prompt) %>%
      mutate(perc = 100*n/sum(n), n_total = sum(n))
  }) %>% ungroup() %>%
  mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
                                        `297` = c('AM'),
                                        `291` = c('AZ'),
                                        `274` = c('BL'),
                                        `46` = c('GR'),
                                        `325` = c('MRD'),
                                        `266` = c('MRN'),
                                        `312` = c('NG'),
                                        `121` = c('NR'),
                                        `245` = c('RJ'),
                                        `65` = c('RS'),
                                        `234` = c('VD'))))) %>%
  left_join(color_chip_hexes,
            by = c("prompt" = "#cnum")) %>%
  rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response, 
         `% of Subjects` = perc, `Hex Code` = hex) %>%
  filter(n_total >= 4)

Characterize consensus over the WCS map

p <- ggplot(spanish_1st_response, 
       aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
  facet_wrap(~`Chip ID`) +
  geom_line(size = 1) +
  geom_point(size=3) +
  scale_y_continuous(limits = c(-10,110), breaks = seq(0,100, by = 25)) + 
  scale_colour_manual(name = "Color Term",values = spanish_graph_colors) +
  theme_bw() + 
  theme(panel.grid = element_blank())

ggplotly(p)

How does bilingualism factor into adult responses?

Compare Spanish versus Shipibo responses in children